

set matsize 11000
clear all

cd \Data


********************************************************************************
** The following do file produces the estimates of Pooled Model (Equation (6)).
** The resulting estimates are displayed in Figures 5 & 6 in th main text as
** well as Figures A6, A10, and Tables A12, A13, A14, & A16 in the Appendix.


use cons_data.dta, clear
sort premise_id
merge m:1 premise_id using premise_characteristics.dta
keep if _merge == 3
drop _merge

keep if (zoning == "R-1" | zoning == "RD 1" | zoning == "RD 2" | zoning == "RD 3" | zoning == "RD 4" | zoning == "RD 5" | zoning == "RD-1" | zoning == "RD-2" | zoning == "RD-3" | zoning == "RD-4" | zoning == "RD-5" | zoning == "RD1" | zoning == "RD2" | zoning == "RD3" | zoning == "RD4" | zoning == "RD5" | zoning == "RD5PD")
drop if bedrooms_num == 0 | bedrooms_num == .
drop if Resdnc_sqft == 0 | Resdnc_sqft == .
keep if year_built > 1974 & year_built < 1983

** Create premise-level controls for bedrooms (6 bins), square-footage (12 bins),
** and an indicator for multiple stories. Note, premises with missing Assessor
** information will be dropped.

drop if bedrooms_num == 0 | bedrooms_num == .
replace bedrooms_num = 6 if bedrooms_num > 6
drop if Resdnc_sqft == 0 | Resdnc_sqft == .
gen sqft = 0
replace sqft = 1 if Resdnc_sqft < 1000
forvalues i = 2/11 {
	replace sqft = `i' if Resdnc_sqft < 1000 + (`i' - 1) * 150 & Resdnc_sqft >= 1000 + (`i' - 2) * 150
}
replace sqft = 12 if Resdnc_sqft >= 2500
gen multi = 0
replace multi = 1 if stories > 1

** Generate week-of-year variable for two-way clustering

gen week_year = year*100 + week

xtset premise_id

gen period = 1
replace period = 2 if year_built == 1978 | year_built == 1979
replace period = 3 if year_built > 1979

** Note: In models, exlcude controls for 3 bedroom, 1750 sqft, single floor homes built pre-1978 and not on electric heat

char period[omit] 1
char bedrooms_num[omit] 3
char sqft[omit] 6
char multi[omit] 0
char electric_heat[omit] 0


** Avg Daily Temp thresholds for segmented linear model

local thresh1=52
local thresh2=62
local thresh3=72

mkspline temp_avg_1 `thresh1' temp_avg_2 `thresh2' temp_avg_3 `thresh3' temp_avg_4 = temp_avg

save Temp/Pooled_dataset.dta, replace

********************************************************************************
** Figure 5 and Table A12: Daily Pooled Model

use Temp/Pooled_dataset.dta, clear

xi: xtivreg2 kwh_daily temp_avg_1 temp_avg_2 temp_avg_3 temp_avg_4 i.period*temp_avg_1 i.period*temp_avg_2 i.period*temp_avg_3 i.period*temp_avg_4 i.bedrooms_num*temp_avg_1 i.bedrooms_num*temp_avg_2 i.bedrooms_num*temp_avg_3 i.bedrooms_num*temp_avg_4 i.sqft*temp_avg_1 i.sqft*temp_avg_2 i.sqft*temp_avg_3 i.sqft*temp_avg_4 i.multi*temp_avg_1 i.multi*temp_avg_2 i.multi*temp_avg_3 i.multi*temp_avg_4 i.electric_heat*temp_avg_1 i.electric_heat*temp_avg_2 i.electric_heat*temp_avg_3 i.electric_heat*temp_avg_4, fe cluster(premise_id week_year)

matrix define beta = e(b)'
matrix define var = e(V)
matrix define var = vecdiag(var)

foreach p in 2 3 {
	
	lincom temp_avg_1 + _IperXtemp__`p'
	scalar define slope_1_per`p' = r(estimate)
	scalar define se_1_per`p' = r(se)
	
	lincom temp_avg_2 + _IperXtemp_a`p'
	scalar define slope_2_per`p' = r(estimate)
	scalar define se_2_per`p' = r(se)
	
	lincom temp_avg_3 + _IperXtemp_b`p'
	scalar define slope_3_per`p' = r(estimate)
	scalar define se_3_per`p' = r(se)
	
	lincom temp_avg_4 + _IperXtemp_c`p'
	scalar define slope_4_per`p' = r(estimate)
	scalar define se_4_per`p' = r(se)

}

foreach j in slope se {
	forvalues t = 1/4 {	
		matrix define `j'_`t' = [`j'_`t'_per2, `j'_`t'_per3] 
	}
	matrix define `j' = [`j'_1 \ `j'_2 \ `j'_3 \ `j'_4]
}

clear
svmat beta /* Coefficient estimates from pooled model */
svmat var /* variance of coefficient estimates */
svmat slope /* Col. vectors of slopes of linear spline segments by pre, during, and post adoption periods */
svmat se /* Std. Errors of slope estimates from linear spline segments */

** Note: the above slope and std. error estimates are displayed in the first
** columns of Table A12. The "Difference" columns in Table A12 display the beta
** point estimates (and the square root of the variance estimates) for the
** coefficients on the interactions between the temperature spline and the
** indicators for periods 2 and 3.

** Note: Figure 5 in the main text assumes that zero elctricity is used for
** temperature control when the average daily temperature is 62 F. The left panel
** uses the slope coefficients for the spline from the pre and post-adoption periods
** to predict the average daily electricity used for temperature control based
** on the average daily temperature. The right panel uses the beta point
** estimates on the interaction between the temperature spline and period 3
** indicator (post-adoption) to estimate the difference between the predicted
** electricty usage for temperature control.


********************************************************************************
** Table A13: Daily Pooled Model with monthly fixed effects

use Temp/Pooled_dataset.dta, clear

gen month_year = year*100 + month

xi: xtivreg2 kwh_daily temp_avg_1 temp_avg_2 temp_avg_3 temp_avg_4 i.period*temp_avg_1 i.period*temp_avg_2 i.period*temp_avg_3 i.period*temp_avg_4 i.bedrooms_num*temp_avg_1 i.bedrooms_num*temp_avg_2 i.bedrooms_num*temp_avg_3 i.bedrooms_num*temp_avg_4 i.sqft*temp_avg_1 i.sqft*temp_avg_2 i.sqft*temp_avg_3 i.sqft*temp_avg_4 i.multi*temp_avg_1 i.multi*temp_avg_2 i.multi*temp_avg_3 i.multi*temp_avg_4 i.electric_heat*temp_avg_1 i.electric_heat*temp_avg_2 i.electric_heat*temp_avg_3 i.electric_heat*temp_avg_4 i.month_year, fe cluster(premise_id week_year)

matrix define beta_month = e(b)'
matrix define var_month = e(V)
matrix define var_month = vecdiag(var)

foreach p in 2 3 {
	
	lincom temp_avg_1 + _IperXtemp__`p'
	scalar define slope_1_per`p' = r(estimate)
	scalar define se_1_per`p' = r(se)
	
	lincom temp_avg_2 + _IperXtemp_a`p'
	scalar define slope_2_per`p' = r(estimate)
	scalar define se_2_per`p' = r(se)
	
	lincom temp_avg_3 + _IperXtemp_b`p'
	scalar define slope_3_per`p' = r(estimate)
	scalar define se_3_per`p' = r(se)
	
	lincom temp_avg_4 + _IperXtemp_c`p'
	scalar define slope_4_per`p' = r(estimate)
	scalar define se_4_per`p' = r(se)

}

foreach j in slope se {
	forvalues t = 1/4 {	
		matrix define `j'_`t' = [`j'_`t'_per2, `j'_`t'_per3] 
	}
	
	matrix define `j'_month = [`j'_1 \ `j'_2 \ `j'_3 \ `j'_4]
	
}

clear
svmat beta_month /* Coefficient estimates from pooled model */
svmat var_month /* variance of coefficient estimates */
svmat slope_month /* Col. vectors of slopes of linear spline segments by pre, during, and post adoption periods */
svmat se_month /* Std. Errors of slope estimates from linear spline segments */

** Note: the above slope and std. error estimates are displayed in the first
** columns of Table A13. The "Difference" columns in Table A13 display the beta
** point estimates (and the square root of the variance estimates) for the
** coefficients on the interactions between the temperature spline and the
** indicators for periods 2 and 3.


********************************************************************************
** Figures 6 and A6: Hourly Pooled Model

forvalues i = 1/24 {

	use Temp/Pooled_dataset.dta, clear	
	
	xi: xtivreg2 kwh_`i' temp_avg_1 temp_avg_2 temp_avg_3 temp_avg_4 i.period*temp_avg_1 i.period*temp_avg_2 i.period*temp_avg_3 i.period*temp_avg_4 i.bedrooms_num*temp_avg_1 i.bedrooms_num*temp_avg_2 i.bedrooms_num*temp_avg_3 i.bedrooms_num*temp_avg_4 i.sqft*temp_avg_1 i.sqft*temp_avg_2 i.sqft*temp_avg_3 i.sqft*temp_avg_4 i.multi*temp_avg_1 i.multi*temp_avg_2 i.multi*temp_avg_3 i.multi*temp_avg_4 i.electric_heat*temp_avg_1 i.electric_heat*temp_avg_2 i.electric_heat*temp_avg_3 i.electric_heat*temp_avg_4, fe cluster(premise_id week_year)
	
	foreach p in 2 3 {
	
		lincom temp_avg_1 + _IperXtemp__`p'
		scalar define slope_1_per`p' = r(estimate)
		scalar define se_1_per`p' = r(se)
	
		lincom temp_avg_2 + _IperXtemp_a`p'
		scalar define slope_2_per`p' = r(estimate)
		scalar define se_2_per`p' = r(se)
	
		lincom temp_avg_3 + _IperXtemp_b`p'
		scalar define slope_3_per`p' = r(estimate)
		scalar define se_3_per`p' = r(se)
	
		lincom temp_avg_4 + _IperXtemp_c`p'
		scalar define slope_4_per`p' = r(estimate)
		scalar define se_4_per`p' = r(se)

	}

	foreach j in slope se {
		forvalues t = 1/4 {	
			matrix define `j'_`t'_h`i' = [`j'_`t'_per2, `j'_`t'_per3] 
		}
	
		matrix define `j'_h`i' = [`j'_1_h`i' \ `j'_2_h`i' \ `j'_3_h`i' \ `j'_4_h`i']
	
	}
	
	matrix define beta_h`i' = e(b)'
	matrix define var_h`i' = e(V)
	
}

clear

forvalues i = 1/24 {

	svmat slope_h`i'
	svmat se_h`i'
	svmat beta_h`i'
	svmat var_h`i'

}

** Note: Figure A6 displays the 24*4 beta_h`i' point estimates on the
** interactions between the 4 temperature spline variables and the period 3
** (post-adoption) indicator.

** Note: Figure 6 uses the 24 beta_h`i' point estimates and var_h`i' values for
** the interaction between the temperature spline and the post-adoption
** indicator to predict the change in hourly average electricity use at average
** daily temperatures of 68 and 80.


********************************************************************************
** Table A14: Daily Pooled Model by Year-Built

use Temp/Pooled_dataset.dta, clear

char year_built[omit] 1977

xi: xtivreg2 kwh_daily temp_avg_1 temp_avg_2 temp_avg_3 temp_avg_4 i.year_built*temp_avg_1 i.year_built*temp_avg_2 i.year_built*temp_avg_3 i.year_built*temp_avg_4 i.bedrooms_num*temp_avg_1 i.bedrooms_num*temp_avg_2 i.bedrooms_num*temp_avg_3 i.bedrooms_num*temp_avg_4 i.sqft*temp_avg_1 i.sqft*temp_avg_2 i.sqft*temp_avg_3 i.sqft*temp_avg_4 i.multi*temp_avg_1 i.multi*temp_avg_2 i.multi*temp_avg_3 i.multi*temp_avg_4 i.electric_heat*temp_avg_1 i.electric_heat*temp_avg_2 i.electric_heat*temp_avg_3 i.electric_heat*temp_avg_4, fe cluster(premise_id week_year)

foreach y in 75 76 78 79 80 81 81 82 {
	
	lincom temp_avg_1 + _IyeaXtem_19`y'
	scalar define slope_1_y19`y' = r(estimate)
	scalar define se_1_y19`y' = r(se)
	
	lincom temp_avg_2 + _IyeaXtema19`y'
	scalar define slope_2_y19`y' = r(estimate)
	scalar define se_2_y19`y' = r(se)
	
	lincom temp_avg_3 + _IyeaXtemb19`y'
	scalar define slope_3_y19`y' = r(estimate)
	scalar define se_3_y19`y' = r(se)
	
	lincom temp_avg_4 + _IyeaXtemc19`y'
	scalar define slope_4_y19`y' = r(estimate)
	scalar define se_4_y19`y' = r(se)

}

foreach j in slope se {
	forvalues t = 1/4 {	
		matrix define `j'_`t'_daily = [`j'_`t'_y1975, `j'_`t'_y1976, `j'_`t'_y1978, `j'_`t'_y1979, `j'_`t'_y1980, `j'_`t'_y1981, `j'_`t'_y1982] 
	}

	matrix define `j'_daily = [`j'_1_daily \ `j'_2_daily \ `j'_3_daily \ `j'_4_daily]
	
}

matrix define beta = e(b)'
matrix define var = e(V)

clear
svmat slope_daily
svmat se_daily
svmat beta
svmat var

** Note: the above slope and std. error estimates are displayed in the first
** columns of Table A14. The "Difference" columns in Table A14 display the beta
** point estimates (and the square root of the variance estimates) for the
** coefficients on the interactions between the temperature spline and the
** indicators for year_built.


********************************************************************************
** Figure A10 and Table A16: Daily Pooled Model -- Electric Heat Homes

use Temp/Pooled_dataset.dta, clear

keep if electric_heat == 1

xi: xtivreg2 kwh_daily temp_avg_1 temp_avg_2 temp_avg_3 temp_avg_4 i.period*temp_avg_1 i.period*temp_avg_2 i.period*temp_avg_3 i.period*temp_avg_4 i.bedrooms_num*temp_avg_1 i.bedrooms_num*temp_avg_2 i.bedrooms_num*temp_avg_3 i.bedrooms_num*temp_avg_4 i.sqft*temp_avg_1 i.sqft*temp_avg_2 i.sqft*temp_avg_3 i.sqft*temp_avg_4 i.multi*temp_avg_1 i.multi*temp_avg_2 i.multi*temp_avg_3 i.multi*temp_avg_4, fe cluster(premise_id week_year)


foreach p in 2 3 {
	
	lincom temp_avg_1 + _IperXtemp__`p'
	scalar define slope_1_per`p' = r(estimate)
	scalar define se_1_per`p' = r(se)
	
	lincom temp_avg_2 + _IperXtemp_a`p'
	scalar define slope_2_per`p' = r(estimate)
	scalar define se_2_per`p' = r(se)
	
	lincom temp_avg_3 + _IperXtemp_b`p'
	scalar define slope_3_per`p' = r(estimate)
	scalar define se_3_per`p' = r(se)
	
	lincom temp_avg_4 + _IperXtemp_c`p'
	scalar define slope_4_per`p' = r(estimate)
	scalar define se_4_per`p' = r(se)

}

foreach j in slope se {
	forvalues t = 1/4 {	
		matrix define `j'_`t'_daily = [`j'_`t'_per2, `j'_`t'_per3] 
	}

	matrix define `j'_daily = [`j'_1_daily \ `j'_2_daily \ `j'_3_daily \ `j'_4_daily]
	
}


matrix define beta = e(b)'
matrix define var = e(V)

clear
svmat beta /* Coefficient estimates from pooled model */
svmat var /* variance of coefficient estimates */
svmat slope_daily /* Col. vectors of slopes of linear spline segments by pre, during, and post adoption periods */
svmat se_daily /* Std. Errors of slope estimates from linear spline segments */

** Note: the above slope and std. error estimates are displayed in the first
** columns of Table A16. The "Difference" columns in Table A16 display the beta
** point estimates (and the square root of the variance estimates) for the
** coefficients on the interactions between the temperature spline and the
** indicators for periods 2 and 3.

** Note: Figure A10 assumes that zero elctricity is used for
** temperature control when the average daily temperature is 62 F. The left panel
** uses the slope coefficients for the spline from the pre and post-adoption periods
** to predict the average daily electricity used for temperature control based
** on the average daily temperature. The right panel uses the beta point
** estimates on the interaction between the temperature spline and period 3
** indicator (post-adoption) to estimate the difference between the predicted
** electricty usage for temperature control.

